Test Queries: Added Self joins test cases. Also added Vladimir's method which requires the following code to be run first: begin work; create function var_set (text,text) returns text as $$ select set_config ('public.' || $2 || pg_backend_pid(), $1, false); $$ LANGUAGE 'sql'; create function var_get (text) returns text as $$ select current_setting('public.' || $1 || pg_backend_pid()); $$ LANGUAGE 'sql'; create operator >>> (procedure = var_set, leftarg = text, rightarg = text); create operator <<< (procedure = var_get, rightarg = text); commit; The above code exploits the use of custom GUC variables to get the lagged value. Clever but ditry :-) ---------------------------------------------------------------------------- -- Test 1 -- -- -- Notes: Get the name, department and salary of highest paid department -- member for each department. If the two highest paid employees -- in the department get the same salary show the one with the -- lowest id. ---------------------------------------------------------------------------- create table employees ( id INT primary key, name varchar(30) not null, department varchar(30) not null, salary int not null, check (salary >= 0) ); insert into employees values(1,'Jeff','IT',10000); insert into employees values(2,'Sam','IT',12000); insert into employees values(3,'Richard','Manager',30000); insert into employees values(4,'Ian','Manager',20000); insert into employees values(5,'John','IT',60000); insert into employees values(6,'Matthew','Director',60000); VACUUM ANALYZE employees; -- Normal way. SELECT name,department,salary FROM employees AS e WHERE id = (SELECT id FROM employees WHERE department = e.department ORDER BY salary DESC,id ASC LIMIT 1); -- above query = 498 tps -- With windowing functions. SELECT name,department,salary FROM (SELECT name, department, salary, row_number() OVER (PARTITION BY department ORDER BY salary DESC,id ASC) AS num FROM employees ) AS t WHERE num = 1; -- above query = 578 tps ---------------------------------------------------------------------------- -- Test 2 -- Split times problem -- -- Notes: Find the interval of time between 1 timestamp and the previous -- timestamp. The previous timestamp being the one with the next lowest id -- column value. If there is no previous timestamp show the value NULL. CREATE TABLE tstest ( id SERIAL NOT NULL PRIMARY KEY, timestamp TIMESTAMP NOT NULL ); INSERT INTO tstest (timestamp) VALUES(NOW()); INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; VACUUM ANALYZE tstest; -- Sub query SELECT timestamp - (SELECT timestamp FROM tstest WHERE id < t.id ORDER BY id DESC LIMIT 1) FROM tstest t; -- above query = 336 tps -- Self Join, (breaks requirements! -> requires gapless sequence of IDs) SELECT t1.timestamp - t2.timestamp FROM tstest t1 LEFT OUTER JOIN tstest t2 ON t2.id + 1 = t1.id; -- above query = 424 tps -- Vladimir's method. select '' >>> 'lag'; select timestamp::timestamp-(case lag when '' then null else lag end)::timestamp from (select <<<'lag' as lag, timestamp::text >>>'lag' as timestamp from tstest order by id ) as t -- above query = 182.78 tps -- windowing functions. SELECT timestamp - LAG(timestamp,1) OVER(ORDER BY id) FROM tstest; -- above query = 481 tps ------------------------------------------------------------------------------------------ -- Test 3 -- Meter reading problem -- -- Notes: Some testing with slightly larger tables. -- Find the day were the least units were used. This will be the smallest difference -- between that day and the previous meter reading. Note, this does not have to be -- the previous day. It's possible that the meter was not read on some days. CREATE TABLE meter_readings ( date DATE NOT NULL PRIMARY KEY, reading DOUBLE PRECISION NOT NULL ); -- Create inital meter reading of 0 for 1st Jan 2000. INSERT INTO meter_readings (date,reading) VALUES('2000-01-01',0); -- Create 10000 readings for each day after 2000-01-01. INSERT INTO meter_readings (date,reading) SELECT '2000-01-01'::date + i.a,i.a * 10000 + RANDOM() * 10000 FROM generate_series(1,10000) AS i(a); VACUUM ANALYZE meter_readings; -- Sub query test SELECT date,reading - (SELECT reading FROM meter_readings WHERE date < p.date ORDER BY date DESC LIMIT 1) AS used FROM meter_readings p ORDER BY used ASC NULLS LAST LIMIT 1; -- above query = 1.3 tps. -- Self join (breaks requirements! requires meter to be read daily!!) SELECT t1.date,t1.reading - t2.reading AS used FROM meter_readings t1 LEFT OUTER JOIN meter_readings t2 ON t2.date + 1 = t1.date ORDER BY used ASC NULLS LAST LIMIT 1; -- above query = 7.59 tps -- Vladimir's method. -- edit postgresql.conf -> custom_variable_classes = 'public' select '' >>> 'lag'; select date, reading::numeric-(case lag when '' then null else lag end)::numeric as used from (select date, <<<'lag' as lag, reading::text >>>'lag' as reading from meter_readings order by date ) as t order by used asc nulls last limit 1 -- above query = 1.9 tps -- Windowing functions SELECT date,reading - LAG(reading,1) OVER (ORDER BY date) AS used FROM meter_Readings ORDER BY used ASC NULLS LAST LIMIT 1; -- above query = 8.45 tps. /* Notes: There seems to be a small issue with the above query. david=# explain SELECT date,reading - LAG(reading,1) OVER (ORDER BY date) AS used david-# FROM meter_Readings david-# ORDER BY used ASC NULLS LAST LIMIT 1; QUERY PLAN --------------------------------------------------------------------------------------------- Limit (cost=989.49..989.49 rows=1 width=12) -> Sort (cost=989.49..1014.49 rows=10001 width=12) Sort Key: ((reading - lag(reading, (1)))) -> Window (cost=814.47..939.48 rows=10001 width=12) -> Sort (cost=814.47..839.47 rows=10001 width=12) Sort Key: date -> Seq Scan on meter_readings (cost=0.00..150.01 rows=10001 width=12) The planner uses a seq scan on meter_readings then performs a sort on date. Even if I do SET enable_seqscan = off; the planner still performs a seq_scan. Why can't the exeutor use an index scan on the primary key? */ ------------------------------------------------------------------------------------------ -- Test 4 -- Marathon Results Problem -- -- Notes: Show the runnerid,name,time taken and the position of each Marathon runner. CREATE TABLE marathon_results ( runnerid SERIAL NOT NULL PRIMARY KEY, name VARCHAR(30) NOT NULL, time INTERVAL NOT NULL ); INSERT INTO marathon_results (name,time) VALUES('Samuel','02:05:24'); INSERT INTO marathon_results (name,time) VALUES('Paul','02:04:55'); INSERT INTO marathon_results (name,time) VALUES('Haile','02:03:59'); INSERT INTO marathon_results (name,time) VALUES('Martin','02:05:15'); INSERT INTO marathon_results (name,time) VALUES('Sammy','02:04:56'); INSERT INTO marathon_results (name,time) VALUES('David','03:15:56'); INSERT INTO marathon_results (name,time) VALUES('Eric','03:15:56'); INSERT INTO marathon_results (name,time) VALUES('Alan','03:24:56'); VACUUM ANALYZE marathon_results; -- sub query SELECT runnerid,name,time,(SELECT COUNT(DISTINCT time)+1 FROM marathon_results WHERE time < r.time) AS position FROM marathon_results r ORDER BY position; -- above query = 424 tps -- Self join SELECT t1.runnerid,t1.name,t1.time,COUNT(DISTINCT t2.time) + 1 AS position FROM marathon_results t1 LEFT OUTER JOIN marathon_results t2 ON t2.time < t1.time GROUP BY t1.runnerid,t1.name,t1.time order by position; -- above query = 361 tps -- Vladimir's method. -- edit postgresql.conf -> custom_variable_classes = 'public' -- init values select ''>>>'prev_time', '0'>>>'dense_rank'; select runnerid,name,time,position from ( select (((case when time::text = <<<'prev_time' then 0 else 1 end)+(<<<'dense_rank')::int4)::text>>>'dense_rank')::int4 as position, runnerid, time, time::text>>>'prev_time', name from (SELECT * FROM marathon_results ORDER BY time) m ) results ORDER BY position; -- above query = 182 tps -- With windowing functions. SELECT runnerid,name,time,DENSE_RANK() OVER(ORDER BY time ASC) AS position FROM marathon_results ORDER BY position; -- above query = 629 tps ------------------------------------------------------------------------------------------ -- Test 5 -- Big Marathon Problem -- -- Notes: As above but bigger table. CREATE TABLE big_marathon ( runnerid SERIAL NOT NULL PRIMARY KEY, time INTERVAL NOT NULL ); -- Generate 10,000 random times from 2 hrs to 4 hrs INSERT INTO big_marathon (time) SELECT '03:00:00'::interval + (CAST(RANDOM() * 3600 AS INT) || ' secs')::INTERVAL - (CAST(RANDOM() * 3600 AS INT) || ' secs')::INTERVAL FROM generate_series(1,10000); -- Give the non-windowing one a chance. CREATE INDEX big_marathon_time_idx ON big_marathon (time); VACUUM ANALYZE big_marathon; -- Who was in Nth place (using 2nd place here but could be anything) -- Sub query SELECT runnerid,time,position FROM (SELECT runnerid,time,(SELECT COUNT(DISTINCT time)+1 FROM big_marathon WHERE time < r.time) AS position FROM big_marathon r ) results WHERE position = 2; -- above query = 404851.763 ms (6 mins 44 secs) -- Self join SELECT t.runnerid,t.name,t.time,t.position FROM (SELECT t1.runnerid,t1.name,t1.time,COUNT(DISTINCT t2.time) + 1 AS position FROM big_marathon t1 LEFT OUTER JOIN big_marathon t2 ON t2.time < t1.time GROUP BY t1.runnerid,t1.name,t1.time ) t WHERE t.position = 2; /* ERROR: could not write block 645447 of temporary file: No space left on device HINT: Perhaps out of disk space? Perhaps not, but it was REALLY slow anyway. */ -- Vladimir's method. -- edit postgresql.conf -> custom_variable_classes = 'public' -- init values select ''>>>'prev_time', '0'>>>'dense_rank'; select runnerid,time,position from ( select (((case when time::text = <<<'prev_time' then 0 else 1 end)+(<<<'dense_rank')::int4)::text>>>'dense_rank')::int4 as position, runnerid, time, time::text>>>'prev_time' from big_marathon order by time ) results where position=2; -- above query = 617 ms -- With windowing functions. SELECT runnerid,time,position FROM (SELECT runnerid,time,DENSE_RANK() OVER(ORDER BY time ASC) AS position FROM big_marathon ) results WHERE position = 2; -- above query = 115.932 ms -- Notes: The windowing function method did not use the index on the time column. ------------------------------------------------------------------------------------------ -- Test 6 -- Stock Problem -- -- Notes: If we have a table for stock which holds the stock levels for each product -- and we have an orders table that contains orders for products and their -- due dates. We need to know how much stock we have for each order. We -- need to allocate stock to an order with the earliest due date first. -- If there are two orders with the same due date the one with the lowest -- order id number should take the stock first. -- We need to see a list of orders with the orderid, partnum, duedate and -- the orderqty and the remaining stock after the order is completed. If the -- remaining stock is less than zero we show that rather than 0 to indicate -- how much stock needs to be bought or made. CREATE TABLE stock ( partnum VARCHAR(16) NOT NULL PRIMARY KEY, stockqty INT NOT NULL, CHECK (stockqty > 0) ); CREATE TABLE orders ( orderid SERIAL NOT NULL PRIMARY KEY, partnum VARCHAR(16) NOT NULL, duedate DATE NOT NULL, orderqty INT NOT NULL, CHECK (orderqty > 0) ); INSERT INTO stock VALUES('CPU',1000); INSERT INTO stock VALUES('HARDDRIVE',2000); INSERT INTO stock VALUES('KEYBOARD',4000); INSERT INTO stock VALUES('MOUSE',9000); INSERT INTO stock VALUES('MONITOR',100); INSERT INTO orders (partnum,duedate,orderqty) VALUES('CPU','2008-12-22', 700); INSERT INTO orders (partnum,duedate,orderqty) VALUES('CPU','2008-11-15', 600); INSERT INTO orders (partnum,duedate,orderqty) VALUES('HARDDRIVE','2008-11-05', 1200); INSERT INTO orders (partnum,duedate,orderqty) VALUES('HARDDRIVE','2008-11-17', 200); INSERT INTO orders (partnum,duedate,orderqty) VALUES('KEYBOARD','2008-11-17', 1000); INSERT INTO orders (partnum,duedate,orderqty) VALUES('KEYBOARD','2008-12-03', 3000); INSERT INTO orders (partnum,duedate,orderqty) VALUES('MONITOR','2008-12-03', 50); INSERT INTO orders (partnum,duedate,orderqty) VALUES('MONITOR','2008-12-03', 60); VACUUM ANALYZE stock; VACUUM ANALYZE orders; -- Sub query SELECT t.orderid, t.partnum, t.duedate, t.orderqty, st.stockqty - t.orderqty - COALESCE((SELECT SUM(orderqty) FROM orders WHERE partnum = t.partnum AND (duedate < t.duedate OR (duedate = t.duedate AND orderid < t.orderid)) ),0) AS remaining FROM orders t INNER JOIN stock st ON t.partnum = st.partnum ORDER BY t.partnum ASC,t.duedate ASC,t.orderid ASC; -- above query = 253 tps. -- With windowing functions SELECT o.orderid, o.partnum, o.duedate, o.orderqty, s.stockqty - SUM(o.orderqty) OVER (PARTITION BY o.partnum ORDER BY o.duedate ASC,o.orderid ASC) AS remaining FROM orders o INNER JOIN stock s ON o.partnum = s.partnum ORDER BY o.partnum ASC,o.duedate ASC,o.orderid ASC; -- above query = 305 tps.